Behavioural findings regarding the Illusion Game.
library(tidyverse)
library(ggdist)
library(ggside)
library(easystats)
library(patchwork)
library(brms)
df <- read.csv("data/study1.csv") |>
mutate(
Date = as.Date(Date),
Participant = fct_reorder(Participant, Date),
Screen_Refresh = as.character(Screen_Refresh),
Illusion_Side = as.factor(Illusion_Side),
Block = as.factor(Block),
Education = fct_relevel(Education, "Master", "Bachelor", "High School", "Other")
)
# plot(estimate_density(dfraw[dfraw$Participant == "60684f29dbfe1bb2059e5e27_rkqoy", "RT"]))
# Dear participant, thank you for participating in our study. Unfortunately, our system detected multiple issues in your data (such as implausibly short responses, and random-like pattern of answers), which make the data unusable for us. We understand that you might have been in a hurry, or had some other issues, and kindly ask you to return your participation. We hope to open-up more slots in the future would you be interested to participate again.
outliers <- c(
# Half of the trials are of very short RT
# Prolific Status: REJECTED
"60684f29dbfe1bb2059e5e27_rkqoy",
# Error rate of 47.9%
# Prolific Status: RETURN REQUESTED
"61280140171ec546e87ed8cb_qdlgy",
# Error rate of 46.2%
# Prolific Status: RETURN REQUESTED
"614f36fd81c78b7a125c4262_6ax4g",
# Error rate of 42.1% and very large RT SD
# Prolific Status: RETURN REQUESTED
"5d398380b37ab1000111fac3_2nxxh",
# Block n2 with very short RTs
# Prolific Status: RETURNED
"5e860198a846e30497df5189_6e43s",
# Error rate of 46.2% and short RTs
# Prolific Status: RETURN REQUESTED
"615f319bb341cf2f306d858d_qsaq5"
)
We removed 6 participants upon inspection of the average error rage (when close to 50%, suggesting random answers) and/or when the reaction time distribution was implausibly fast.
For each block, we computed the error rate and, if more than 50%, we discarded the whole block (as it likely indicates that instructions got mixed up, for instance participants were selecting the smaller instead of the bigger circle).
dfsub <- df |>
group_by(Participant) |>
summarize(
# n = n(),
Error = sum(Error) / n(),
RT_Mean = mean(RT),
RT_SD = sd(RT),
) |>
ungroup() |>
arrange(desc(Error))
knitr::kable(dfsub) |>
kableExtra::row_spec(which(dfsub$Participant %in% outliers), background = "#EF9A9A")
| Participant | Error | RT_Mean | RT_SD |
|---|---|---|---|
| 61280140171ec546e87ed8cb_qdlgy | 0.479 | 262 | 296 |
| 615f319bb341cf2f306d858d_qsaq5 | 0.465 | 263 | 372 |
| 614f36fd81c78b7a125c4262_6ax4g | 0.462 | 630 | 611 |
| 5d398380b37ab1000111fac3_2nxxh | 0.421 | 507 | 1679 |
| 5e860198a846e30497df5189_6e43s | 0.402 | 492 | 725 |
| 6104696d120a50b0ec51aa1f_m56p5 | 0.300 | 723 | 579 |
| 61572ca3e91309ebe876a9db_8cqnp | 0.287 | 659 | 333 |
| 5d9091ff391a60058a7711b5_dvz9e | 0.269 | 578 | 172 |
| 5bb511c6689fc5000149c703_r4kjk | 0.262 | 716 | 271 |
| 5ab308940527ba0001c15a9f_rnclh | 0.262 | 588 | 206 |
| 6106b7157977b80c497314f8_d7ukm | 0.260 | 718 | 1294 |
| 61088fcf393bde58359957b3_ol8jp | 0.255 | 837 | 711 |
| 60684f29dbfe1bb2059e5e27_rkqoy | 0.251 | 599 | 1326 |
| 611eb7284490ba01cddfbe98_om6zf | 0.246 | 699 | 414 |
| 5bc84a5f0760100001b3b9de_4cxmv | 0.246 | 560 | 197 |
| 60d129f2a122e84175a56425_z2w8h | 0.243 | 693 | 232 |
| 5d7389f193a945001a3ea504_nhua6 | 0.238 | 1160 | 1660 |
| 60dae077e62179eb469e32a4_b4pte | 0.227 | 748 | 243 |
| 5c6b0a27ffc824000191c7d8_5ajt1 | 0.225 | 780 | 427 |
| 5f2f0ac775b91d2e1865c1cc_xkcs9 | 0.219 | 785 | 836 |
| 5ff46a1a99e7cfb2994f7958_f2zg0 | 0.216 | 506 | 150 |
| 5f19559b9665f700090276c4_hsmss | 0.215 | 738 | 375 |
| 5c8ab0f10de08f00016e43a1_pyvgt | 0.213 | 1076 | 557 |
| 5ecd37ee75736a068808fa6c_7fgo5 | 0.210 | 870 | 489 |
| 6166a03f5063db088c458b73_m7w8f | 0.207 | 804 | 378 |
| 606cd013f538ed55e02069b5_vr3v7 | 0.206 | 652 | 367 |
| 5f480e566265722a9b2b2660_0bola | 0.205 | 511 | 147 |
| 605b60879326739b05897042_bpsyp | 0.203 | 627 | 223 |
| 609193e5a0cea97bf00ac6e2_a6zcr | 0.202 | 1133 | 982 |
| 610b0a1bf2434edb31592209_3f1wq | 0.202 | 869 | 424 |
| 5f08583a3d61a604d606c517_o75t7 | 0.201 | 720 | 298 |
| 55eab7fd748092000daa98f2_f10fa | 0.198 | 1110 | 738 |
| 612ba4de7e2b127155f4bb03_ph1f8 | 0.196 | 867 | 652 |
| 5e04595a4fa02aefdb9c9ced_n3rey | 0.189 | 983 | 830 |
| 61114f10ae21c59c0ed3d106_jw6v8 | 0.187 | 711 | 195 |
| 5f14886922a7d20725a22cde_9awyt | 0.186 | 803 | 397 |
| 60a6dd8779e3de1097e5d50a_t4wyc | 0.185 | 846 | 765 |
| 5c73e5d89b46930001ee7edc_ydo84 | 0.182 | 1045 | 1082 |
| 5e84f2663a34f20c3907e237_rt0oo | 0.181 | 1001 | 562 |
| 5ebde9baaefecd1325ef23c7_lphsv | 0.176 | 1307 | 1091 |
| 5d59a9d909f4300001de0c3b_l125y | 0.175 | 1146 | 900 |
| 563bb259be9cac0005aab7ab_te1z4 | 0.174 | 703 | 243 |
| 5fd97d5b40332e276ea58209_xmckd | 0.173 | 1046 | 918 |
| 60ba6031b6dde7c5b869bf74_gqplc | 0.173 | 616 | 381 |
| 5dfae1f373d7248254527108_0rb1e | 0.171 | 927 | 550 |
| 60b8e0ec34553723e3d6504d_ju18r | 0.170 | 769 | 312 |
| 5d5051e31025380015dc59b8_dwrdh | 0.157 | 848 | 364 |
| 60366cfe9748fc2b0ccbc9d0_ox8hj | 0.156 | 712 | 383 |
| 5bce155e561901000121006f_49472 | 0.142 | 1109 | 863 |
| 5ccc3dd7a758ba00133c0763_lwl1g | 0.139 | 895 | 816 |
| 5a0b46e0844c7a00015e4d13_jedw6 | 0.124 | 741 | 331 |
| 5eb0205cac7ad4085dc32a50_5xekt | 0.092 | 884 | 509 |
temp <- df |>
group_by(Participant, Illusion_Type, Block) |>
summarize(ErrorRate_per_block = sum(Error) / n()) |>
ungroup() |>
arrange(desc(ErrorRate_per_block))
temp2 <- temp |>
filter(ErrorRate_per_block >= 0.5) |>
group_by(Illusion_Type, Block) |>
summarize(n = n()) |>
arrange(desc(n), Illusion_Type) |>
ungroup() |>
mutate(n_trials = cumsum(n * 56),
p_trials = n_trials / nrow(df))
# knitr::kable(temp2)
p1 <- temp |>
estimate_density(at = c("Illusion_Type", "Block")) |>
ggplot(aes(x = x, y = y)) +
geom_line(aes(color = Illusion_Type, linetype = Block)) +
geom_vline(xintercept = 0.5, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
labs(y = "Distribution", x = "Error Rate") +
theme_modern()
p2 <- temp2 |>
mutate(Block = fct_rev(Block)) |>
ggplot(aes(x = Illusion_Type, y = p_trials)) +
geom_bar(stat="identity", aes(fill = Block)) +
scale_x_discrete(expand = c(0, 0)) +
scale_y_continuous(labels = scales::percent, expand = c(0, 0)) +
labs(y = "Percentage of Trials Removed", x = "Illusion Type") +
theme_modern() +
theme(axis.text.x = element_text(angle=45, hjust = 1))
p1 | p2
# Drop
df <- df |>
group_by(Participant, Illusion_Type, Block) |>
mutate(ErrorRate_per_block = sum(Error) / n()) |>
ungroup() |>
filter(ErrorRate_per_block < 0.5) |>
select(-ErrorRate_per_block)
rm(temp, temp2)
# RT distribution
p <- estimate_density(df, select = "RT", at = c("Participant", "Block")) |>
group_by(Participant) |>
normalize(select = "y") |>
ungroup() |>
mutate(color = ifelse(Participant %in% outliers, "red", "blue")) |>
ggplot(aes(x = x, y = y)) +
geom_area(data = normalize(estimate_density(df, select = "RT"), select = "y"), alpha = 0.2) +
geom_line(aes(color = color, group = interaction(Participant, Block), linetype = Block)) +
geom_vline(xintercept = 2500, linetype = "dashed", color = "red") +
scale_color_manual(values=c("red"="red", "blue"="blue"), guide = "none") +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
coord_cartesian(xlim = c(0, 3000)) +
theme_modern() +
theme(axis.text.y = element_blank()) +
facet_wrap(~Participant) +
labs(y = "", x = "Reaction Time (ms)")
p
# ggsave("figures/outliers.png", p, width=20, height=15)
# Filter out
df <- filter(df, !Participant %in% outliers)
p1 <- estimate_density(df, select = "RT", at = "Participant") |>
group_by(Participant) |>
normalize(select = "y") |>
ungroup() |>
ggplot(aes(x = x, y = y)) +
geom_area(data = normalize(estimate_density(df, select = "RT"), select = "y"), alpha = 0.2) +
geom_line(aes(color = Participant, group = Participant)) +
geom_vline(xintercept = c(150, 3000), linetype = "dashed", color = "red") +
scale_color_material_d("rainbow", guide = "none") +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
coord_cartesian(xlim = c(0, 3500)) +
theme_modern() +
theme(axis.text.y = element_blank()) +
# facet_wrap(~Participant) +
labs(y = "", x = "Reaction Time (ms)")
df$Outlier <- df$RT < 150 | df$RT > 3000
p2 <- df |>
group_by(Participant) |>
summarize(Outlier = sum(Outlier) / n()) |>
mutate(Participant = fct_reorder(Participant, Outlier)) |>
ggplot(aes(x = Participant, y = Outlier)) +
geom_bar(stat = "identity", aes(fill = Participant)) +
scale_fill_material_d("rainbow", guide = "none") +
scale_x_discrete(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0), labels = scales::percent) +
see::theme_modern() +
theme(axis.text.x = element_blank())
p1 | p2
We removed 692 (1.37%) outlier trials (150 ms < RT < 3000 ms).
df <- filter(df, Outlier == FALSE)
dfsub <- df |>
group_by(Participant) |>
select(Participant, Age, Sex, Education, Nationality, Ethnicity, Duration, Break_Duration, Screen_Resolution, Screen_Refresh, Device_OS) |>
slice(1) |>
ungroup()
The final sample included 46 participants (Mean age = 26.7, SD = 7.7, range: [19, 60]; Sex: 39.1% females, 56.5% males, 4.3% other; Education: Master, 17.39%; Bachelor, 41.30%; High School, 39.13%; Other, 2.17%).
plot_distribution <- function(dfsub, what = "Age", title = what, subtitle = "", fill = "orange") {
dfsub |>
ggplot(aes_string(x = what)) +
geom_density(fill = fill) +
geom_vline(xintercept = mean(dfsub[[what]]), color = "red", linetype = "dashed") +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
ggtitle(title, subtitle = subtitle) +
theme_modern() +
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(face = "italic", hjust = 0.5),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank()
)
}
plot_waffle <- function(dfsub, what = "Nationality") {
ggwaffle::waffle_iron(dfsub, what) |>
# mutate(label = emojifont::fontawesome('fa-twitter')) |>
ggplot(aes(x, y, fill = group)) +
ggwaffle::geom_waffle() +
# geom_point() +
# geom_text(aes(label=label), family='fontawesome-webfont', size=4) +
coord_equal() +
ggtitle(what) +
labs(fill = "") +
theme_void() +
theme(plot.title = element_text(face = "bold", hjust = 0.5))
}
p1 <- plot_distribution(dfsub, "Age", fill = "#FF9800")
p2 <- plot_distribution(dfsub, "Duration", title = "Total Duration", subtitle = "in minutes", fill = "#F44336")
p3 <- plot_distribution(dfsub, "Break_Duration", title = "Break Duration", subtitle = "in minutes", fill = "#3F51B5")
p4 <- plot_waffle(dfsub, "Sex") +
scale_fill_manual(values = c("Male" = "#2196F3", "Female" = "#E91E63", "Other" = "#FF9800"))
p5 <- plot_waffle(dfsub, "Education") +
scale_fill_viridis_d()
p6 <- plot_waffle(dfsub, "Nationality") +
scale_fill_metro_d()
p7 <- plot_waffle(dfsub, "Ethnicity") +
scale_fill_manual(values = c("Latino" = "#FF5722", "Asian" = "#FF9800", "Caucasian" = "#2196F3", "African" = "#4CAF50", "Jewish" = "#9C27B0"))
p8 <- plot_waffle(dfsub, "Screen_Resolution") +
scale_fill_pizza_d()
p9 <- plot_waffle(dfsub, "Device_OS") +
scale_fill_bluebrown_d()
# p10 <- plot_waffle(dfsub, "Screen_Refresh") +
# scale_fill_viridis_d()
(p1 / p2 / p3) | (p4 / p5 / p6) | (p7 / p8 / p9)
plot_descriptive <- function(data, side="leftright") {
if(side == "leftright") {
x <- data[data$Error == 0 & data$Illusion_Side == 1, ]$Answer[1]
x <- tools::toTitleCase(gsub("arrow", "", x))
if(x == "Left") data$Answer <- ifelse(data$Illusion_Side == 1, "Left", "Right")
}
dodge1 <- 0.1 * diff(range(data$Illusion_Difference))
dodge2 <- -0.1 * diff(range(data$Illusion_Strength))
colors <- colorRampPalette(c("#4CAF50", "#009688", "#00BCD4", "#2196F3", "#3F51B5", "#673AB7", "#9C27B0"))(length(unique(data$Illusion_Strength)))
p1 <- data |>
group_by(Illusion_Difference, Illusion_Strength, Answer) |>
summarize(Error = mean(Error)) |>
mutate(Illusion_Strength = as.factor(Illusion_Strength)) |>
ggplot(aes(x = Illusion_Difference, y = Error)) +
geom_bar(aes(fill=Illusion_Strength), position = position_dodge(width=dodge1), stat="identity") +
geom_line(aes(color = Illusion_Strength), position = position_dodge(width=dodge1)) +
scale_y_continuous(limits = c(0, 1), expand = c(0, 0), labels = scales::percent) +
scale_x_continuous(expand = c(0, 0)) +
scale_color_manual(values = colors) +
scale_fill_manual(values = colors) +
theme_modern() +
labs(
color = "Illusion Strength", fill = "Illusion Strength",
y = "Probability of Error",
x = "Task Difficulty"
) +
theme(plot.title = element_text(face = "bold", hjust = 0.5))
colors <- colorRampPalette(c("#F44336", "#FFC107", "#4CAF50"))(length(unique(data$Illusion_Difference)))
p2 <- data |>
group_by(Illusion_Difference, Illusion_Strength, Answer) |>
summarize(Error = mean(Error)) |>
mutate(Illusion_Difference = as.factor(round(Illusion_Difference, 2))) |>
ggplot(aes(x = Illusion_Strength, y = Error)) +
geom_bar(aes(fill=Illusion_Difference), position = position_dodge(width=dodge2), stat="identity") +
geom_line(aes(color = Illusion_Difference), position = position_dodge(width=dodge2)) +
scale_y_continuous(limits = c(0, 1), expand = c(0, 0), labels = scales::percent) +
scale_x_continuous(expand = c(0, 0)) +
scale_color_manual(values = colors) +
scale_fill_manual(values = colors) +
theme_modern() +
labs(
color = "Task Difficulty", fill = "Task Difficulty",
y = "Probability of Error",
x = "Illusion Strength"
) +
theme(plot.title = element_text(face = "bold", hjust = 0.5))
if(side == "leftright") {
p <- ((p1 + facet_wrap(~Answer, ncol=2, labeller = "label_both")) /
(p2 + facet_wrap(~Answer, ncol=2, labeller = "label_both"))) +
plot_annotation(title = "Delboeuf Illusion",
theme = theme(plot.title = element_text(face = "bold", hjust = 0.5)))
}
p
}
data <- filter(df, Illusion_Type == "Delboeuf")
plot_descriptive(data)
models <- list()
for(i in 1:3) {
for(j in 1:3) {
m <- glmmTMB::glmmTMB(
Error ~ poly(Illusion_Difference, i) * poly(Illusion_Strength, j) +
(1 | Participant),
data = data,
family = "binomial"
)
if(performance::check_convergence(m)) {
models[[paste0("dif", i, "-str", j)]] <- m
}
# m <- glmmTMB::glmmTMB(
# Error ~ poly(log(Illusion_Difference), i) * poly(Illusion_Strength, j) +
# (1 | Participant),
# data = data,
# family = "binomial"
# )
# if(performance::check_convergence(m)) {
# models[[paste0("dif(log(", i, "))-str", j)]] <- m
# }
m <- glmmTMB::glmmTMB(
Error ~ Illusion_Side * poly(Illusion_Difference, i) * poly(Illusion_Strength, j) +
(1 | Participant),
data = data,
family = "binomial"
)
if(performance::check_convergence(m)) {
models[[paste0("side-dif", i, "-str", j)]] <- m
}
}
}
test <- test_performance(models, reference=3)
perf <- compare_performance(models, metrics = c("BIC", "R2"))
merge(perf, test) |>
arrange(BIC) |>
select(Name, BIC, R2_marginal, BF) |>
mutate(BF = insight::format_bf(BF, name=""))
## Name BIC R2_marginal BF
## 1 dif1-str2 3303 0.378
## 2 dif1-str1 3307 0.400 = 0.101
## 3 dif1-str3 3307 0.404 = 0.085
## 4 dif2-str1 3321 0.384 < 0.001
## 5 dif2-str2 3326 0.365 < 0.001
## 6 side-dif1-str1 3332 0.401 < 0.001
## 7 dif3-str1 3338 0.384 < 0.001
## 8 dif2-str3 3339 0.385 < 0.001
## 9 side-dif1-str2 3343 0.380 < 0.001
## 10 dif3-str2 3349 0.367 < 0.001
## 11 side-dif2-str1 3360 0.387 < 0.001
## 12 side-dif1-str3 3364 0.406 < 0.001
## 13 dif3-str3 3371 0.386 < 0.001
## 14 side-dif3-str1 3386 0.401 < 0.001
## 15 side-dif2-str2 3388 0.369 < 0.001
## 16 side-dif2-str3 3423 0.390 < 0.001
## 17 side-dif3-str2 3430 0.380 < 0.001
## 18 side-dif3-str3 3479 0.424 < 0.001
formula <- brms::bf(
Error ~ Illusion_Difference * poly(Illusion_Strength, 2) +
(1 + Illusion_Difference * poly(Illusion_Strength, 2) | Participant),
family = "bernoulli"
)
model <- brms::brm(formula,
data = data,
algorithm = "sampling",
refresh = 0
)
plot_model <- function(data, model) {
data <- mutate(data, .dots_side = ifelse(Error == 1, "bottom", "top"))
n_lines <- 5
pred <- estimate_relation(model,
at = c("Illusion_Strength", "Illusion_Difference"),
length = c(5, n_lines)) |>
mutate(Illusion_Difference = as.factor(round(Illusion_Difference, 2)))
# Set colors for lines
colors <- colorRampPalette(c("#F44336", "#FFC107", "#4CAF50"))(n_lines)
diffvals <- as.numeric(as.character(unique(sort(pred$Illusion_Difference))))
names(colors) <- diffvals
# Assign color from the same palette to every observation of data (for geom_dots)
data$color <- colors[as.character(diffvals[max.col(-abs(outer(data$Illusion_Difference, diffvals, "-")))])]
# Manual jittering
xrange <- 0.05*diff(range(data$Illusion_Strength))
data$x <- data$Illusion_Strength
data$x[data$x > 0] <- data$x[data$x > 0] - runif(sum(data$x > 0), 0, xrange)
data$x[data$x < 0] <- data$x[data$x < 0] + runif(sum(data$x < 0), 0, xrange)
data$x[round(data$x, 2) == 0] <- data$x[round(data$x, 2) == 0] + runif(sum(round(data$x, 2) == 0), -xrange/2, xrange/2)
pred |>
ggplot(aes(x = Illusion_Strength, y = Predicted)) +
geom_dots(
data = mutate(data,
Illusion_Strength = round(Illusion_Strength, 1)),
aes(x=x, y = Error, group = Error, side = .dots_side),
fill = data$color,
color = NA,
alpha=0.5) +
geom_slab(data=data, aes(y = Error)) +
geom_ribbon(aes(ymin = CI_low, ymax = CI_high, fill = Illusion_Difference, group = Illusion_Difference), alpha = 0.2) +
geom_vline(xintercept = 0, linetype = "dashed") +
geom_hline(yintercept = c(0.05, 0.5, 0.95), linetype = "dotted", alpha = 0.5) +
geom_line(aes(color = Illusion_Difference, group = Illusion_Difference)) +
scale_y_continuous(limits = c(0, 1), expand = c(0, 0), labels = scales::percent) +
scale_x_continuous(expand = c(0, 0)) +
scale_color_manual(values = colors) +
scale_fill_manual(values = colors) +
coord_cartesian(xlim=c(min(data$Illusion_Strength), max(data$Illusion_Strength))) +
theme_modern() +
labs(
title = paste0(data$Illusion_Type[1], " Illusion"),
color = "Difficulty", fill = "Difficulty",
y = "Probability of Error",
x = "Illusion Strength"
) +
theme(plot.title = element_text(face = "bold", hjust = 0.5))
}
plot_model(data, model)
range(data$Illusion_Difference)
## [1] 0.283 0.894
range(data$Illusion_Strength)
## [1] -2.1 2.1
estimate_relation(
model,
at = list(Illusion_Strength = c(0),
Illusion_Difference = seq(min(data$Illusion_Difference), max(data$Illusion_Difference), length.out=500)),
) |>
select(Illusion_Strength, Illusion_Difference, Error = Predicted) |>
slice(c(which.min(abs(Error - 0.05)), which.min(abs(Error - 0.25)))) |>
mutate(Error = insight::format_value(Error, as_percent=TRUE))
## Illusion_Strength | Illusion_Difference | Error
## ------------------------------------------------
## 0.00 | 0.62 | 4.99%
## 0.00 | 0.34 | 24.95%